rm(list=ls())
gc()
require(foreign)
require(ineq)
require(dplyr)
library(Hmisc)
require(corrplot)
require(systemfit)
require(cem)
require(BayesTree)
require(stargazer)
require(texreg)
require(plotrix)

setwd(paste(substr(getwd(),1,regexpr("Dropbox",getwd())[1]-1),"Dropbox/coauthors/AMT_Bisbee_Larson/bisbee_larson_replication_final",sep=""))
source("./tools/leg2.R")

###################################################################################
##
##  File Name:    descriptive_results.R
##
##  Input Files:  cleaned-full.csv
##                summary_stats.dta
##                correlation-long.dta
##                final_data_clean.dta
##  Output Files: summary_stats.tex             (Table 2)
##                equiv-real-sn-email.pdf       (Figure 1)
##                SI-equiv-all.pdf              (SI Figure 2)
##                correlation_fig.pdf           (Figure 3)
##                SI_donations_by_tie.pdf       (SI Figure 4)
##
##  Purpose:  This file uses several different pre-cleaned data sources to generate summary
##            statistics, including Table 2, Figure 1, and Figures 2 and 3 in the Supporting
##            Information.
##
##############################################################################################

specify_decimal <- function(x, k) format(round(x, k), nsmall=k)
startup <- function(x, out=NULL, ...){
  undo <- gsub("\\\\textasteriskcentered", "*", stargazer(x))#, ...))
  restar <- gsub("* * *", "${}^{***}$", undo, fixed = TRUE)
  restar <- gsub("* *", "${}^{**}$", restar, fixed = TRUE)
  restar <- gsub("* ", "${}^{*}$", restar, fixed = TRUE)
  restar <- gsub("\\textbackslash","\\",restar,fixed = TRUE)
  restar <- gsub("\\ textit\\{","\\textit{",restar,fixed = TRUE)
  restar <- gsub("\\}","}",restar,fixed = TRUE)
  if(!is.null(out)) cat(restar, file = out, sep="\n")
  restar
}
getwd()


######################################################################
# Equivalency Analysis: Figure 1 and SI Figure 1
######################################################################
equiv <- read.csv("./descriptive/cleaned-full.csv",stringsAsFactors = F)
which(colnames(equiv) == "topsn_weakest")
for(i in 107:116) {
  equiv[,i] <- as.numeric(as.character(equiv[,i]))
}

# Is treatment random (or is there evidence of priming?)
ties <- c("clique","weakest","weak","strong","strongest")
type <- c("topsn","topem","othersn2","offline","fb","tumblr","inst","li","twitter","gplus","othersn","gmail","apple","outlook","yahoo","windows","otherem")

betas.sn <- sapply(c(paste(type[1],ties,sep="_")), function(x) summary(lm(equiv[,x]~equiv$online))$coefficient[2,1])
ses.sn <- sapply(c(paste(type[1],ties,sep="_")), function(x) summary(lm(equiv[,x]~equiv$online))$coefficient[2,2])
betas.em <- sapply(c(paste(type[2],ties,sep="_")), function(x) summary(lm(equiv[,x]~equiv$online))$coefficient[2,1])
ses.em <- sapply(c(paste(type[2],ties,sep="_")), function(x) summary(lm(equiv[,x]~equiv$online))$coefficient[2,2])
betas.oc <- sapply(c(paste(type[3],ties,sep="_")), function(x) summary(lm(equiv[,x]~equiv$online))$coefficient[2,1])
ses.oc <- sapply(c(paste(type[3],ties,sep="_")), function(x) summary(lm(equiv[,x]~equiv$online))$coefficient[2,2])
betas.off <- sapply(c(paste(type[4],ties,sep="_")), function(x) summary(lm(equiv[,x]~equiv$online))$coefficient[2,1])
ses.off <- sapply(c(paste(type[4],ties,sep="_")), function(x) summary(lm(equiv[,x]~equiv$online))$coefficient[2,2])
betas.fb <- sapply(c(paste(type[5],ties,sep="_")), function(x) summary(lm(equiv[,x]~equiv$online))$coefficient[2,1])
ses.fb <- sapply(c(paste(type[5],ties,sep="_")), function(x) summary(lm(equiv[,x]~equiv$online))$coefficient[2,2])
betas.out <- sapply(c(paste(type[14],ties,sep="_")), function(x) summary(lm(equiv[,x]~equiv$online))$coefficient[2,1])
ses.out <- sapply(c(paste(type[14],ties,sep="_")), function(x) summary(lm(equiv[,x]~equiv$online))$coefficient[2,2])


######################################################################
# equiv-real-sn-email.pdf: Figure 1
######################################################################
pdf("./1_Figures/equiv-real-sn-email.pdf")
par(mar=c(5,7,3,1))
mins <- min(c(betas.off-2*ses.off,betas.fb-2*ses.fb,betas.em-2*ses.em))
maxs <- max(c(betas.off+2*ses.off,betas.fb+2*ses.fb,betas.em+2*ses.em))
plot(betas.off,1:5,xlim=c(mins,maxs),xlab="Treatment Effect",ylab="",yaxt='n',pch=15,col="black",bty='n',xaxt='n',
     ylim=c(1,6),main="Treatment Effect on Frequency of Interaction")
mtext(text=capitalize(ties),side = 2,at = 1:5,las=2,line=1)
axis(1,at = seq(round(mins),round(maxs),by=abs(round(maxs) - round(mins))/4),labels=seq(round(mins),round(maxs),by=abs(round(maxs) - round(mins))/4),
     col=rgb(0,0,0,.3))
segments(betas.off-1.65*ses.off,1:5,betas.off+1.65*ses.off)
segments(0,0,0,5.25,lty=3,col=rgb(0,0,0,.5))
segments(betas.fb-1.65*ses.fb,1:5,betas.fb+1.65*ses.fb)
segments(betas.out-2*ses.out,1:5,betas.out+2*ses.out,col=rgb(0,0,0,.4))
points(betas.out,1:5,pch=19,col=rgb(0,0,0,.5))
points(betas.fb,1:5,pch=21,bg="white")
legend("top",legend=c("Real World","Email","Social Network"),pch=c(15,19,21),col=c("black",rgb(0,0,0,.5),"black"),pt.bg="white",lty=1,bty='n')
dev.off()


######################################################################
# SI-equiv-all.pdf: Supporting Information Figure 2
######################################################################
pdf("./1_Figures/SI-equiv-all.pdf")
nf <- layout(matrix(c(1,2,3,4,5,6),6,1,byrow=T),heights=c(.8,1.4,1.4,1.4,1.4,1.4))
#layout.show(nf)
par(mar=c(0,4,1,3))
plot(1:4,rep(0,4),type='n',xaxt='n',yaxt='n',xlab="",ylab="",bty='n',xlim=c(.5,4.5),
     main="Online Elicitation Effect on Actual Interaction via:")
text(x = 1:4,y=rep(-.5,4),labels = c("Real World","Email","Online Community","Social Network"),las=2,cex=1.3)
par(mar=c(.5,4,.5,3))
for(i in 1:length(betas.sn)) {
  mins <- min(c(betas.off[i]-2*ses.off[i],
                betas.fb[i]-2*ses.fb[i],
                betas.em[i]-2*ses.em[i],
                betas.sn[i]-2*ses.sn[i]))
  maxs <- abs(mins)
  
  plot(1:4,rep(0,4),ylim=c(mins,maxs),xlim=c(0.5,4.5),type='n',xlab='',ylab=capitalize(ties[i]),xaxt='n',bty='n')
  if(i == 1) {
    text(x = 4.1,y=maxs+.05,labels = "Facebook",las=2,col=rgb(139,157,195,maxColorValue = 255),cex=.8)
  }
  segments(3,betas.oc[i] - 1.65*ses.oc[i],3,betas.oc[i] + 1.65*ses.oc[i])
  segments(2,betas.em[i] - 1.65*ses.em[i],2,betas.em[i] + 1.65*ses.em[i])
  segments(4,betas.sn[i] - 1.65*ses.sn[i],4,betas.sn[i] + 1.65*ses.sn[i])
  segments(4.1,betas.fb[i] - 1.65*ses.fb[i],4.1,betas.fb[i] + 1.65*ses.fb[i],lty=2,col=rgb(139,157,195,maxColorValue = 255))
  segments(1,betas.off[i] - 1.65*ses.off[i],1,betas.off[i] + 1.65*ses.off[i])
  abline(h = 0)
  points(3,betas.oc[i],pch=19)
  points(2,betas.em[i],pch=19)
  points(4,betas.sn[i],pch=19)
  points(4.1,betas.fb[i],pch=21,col=rgb(139,157,195,maxColorValue = 255),bg="white")
  points(1,betas.off[i],pch=19)
}
dev.off()

######################################################################
# summary_stats.tex: Table 2
######################################################################
dat <- read.dta("./descriptive/summary_stats.dta")

vars <- colnames(dat %>% select(-c(online,introvert)))
lookup <- cbind(vars,c("Education","Age","Industry","Occupation","Race","Country","Family","Gender","Income","Marital"),
                c("\\textit{6 = HS, 7 = College}","\\textit{3 = 25-34, 4 = 35-44}","","","","","","\\textit{1 = male, 2 = female}",
                  "\\textit{4 = $40-50k, 5 = $50-60k}",""))
results <- matrix(NA,nrow=20,ncol=4)
i = 1
for(v in vars) {
  tmp <- t.test(dat[which(dat$online == 1),v],dat[which(dat$online == 0),v])
  stars <- ifelse(tmp$p.value < 0.01,"***",
                  ifelse(tmp$p.value < 0.05,"**",
                         ifelse(tmp$p.value < .1,"*","")))
  results[i,1] <- lookup[which(lookup[,1] == v),2]
  results[i+1,1] <- lookup[which(lookup[,1] == v),3]
  results[i,2] <- specify_decimal(tmp$estimate[2],2)
  results[i+1,2] <- paste("(",specify_decimal(sd(dat[which(dat$online == 0),v],na.rm=T),2),")",sep="")
  results[i,3] <- specify_decimal(tmp$estimate[1],2)
  results[i+1,3] <- paste("(",specify_decimal(sd(dat[which(dat$online == 1),v],na.rm=T),2),")",sep="")
  results[i,4] <- paste(specify_decimal(tmp$estimate[2] - tmp$estimate[1],2),stars,sep="")
  results[i+1,4] <- paste("(",specify_decimal(abs((tmp$estimate[2] - tmp$estimate[1])/tmp$statistic),2),")",sep="")
  i = i+2
}

results <- rbind(results,cbind("N","233","260","493"))

startup(results,out = "./2_Tables/summary_stats.tex") # Need to add sample sizes manually

######################################################################
# correlation_fig.pdf: Figure 3
######################################################################
dat <- read.dta("./descriptive/correlation-long.dta")

defs <- c("Donate (% $100)","Job Search","Contrib. to Entrep.","Garner Conts.",
          "Pers. Gain (% $100)","Group Gain (% $100)","Political Hom.","Religious Hom.",
          "Education Hom.","Class Hom.","Pers. Crisis","Pers. Success",
          "Prof. Crisis","Prof. Success","Pref. Interaction",
          "Tie Strength (synth.)","Tie Strength","Least in Common",
          "Most in Common")

abbs <- colnames(dat)

colnames(dat) <- defs
dat <- dat[,-c(which(colnames(dat) %in% c("Donate (% $100)","Tie Strength (synth.)")))]
ords <- match(c("Political Hom.","Religious Hom.","Education Hom.","Class Hom.","Least in Common","Most in Common",
                "Contrib. to Entrep.","Garner Conts.","Pers. Gain (% $100)","Group Gain (% $100)",
                "Pers. Crisis","Pers. Success","Pref. Interaction","Tie Strength",
                "Job Search","Prof. Crisis","Prof. Success"),colnames(dat))
dat <- dat[,ords]
M <- cor(dat,use="complete")
M[which(M < -.9)] <- 0
M <- abs(M)

trust <- 3:5
homo <- 7:10
intim <- 11:14
strcom <- 16:19
oth <- c(1,2,6,15)
diag(M) <- 1

pdf("./1_Figures/correlation_fig.pdf",width = 9,height=5)
par(xpd=F)
par(mar=c(0,0,0,0))
corrplot(M,mar = c(0,3,0,0),method="shade",
         cl.lim = c(0, 1),type="lower",tl.cex=.5) #order = "alpha"
par(xpd=T)
segments(-4,.6,-4,3.4,lwd=2,col="grey60",cex=.8)
segments(-4,3.4,-3.65,3.4,lwd=2,col="grey60",cex=.8)
segments(-4,.6,-3.65,.6,lwd=2,col="grey60",cex=.8)
text(-4.5,2,"Prof.",srt=90,col="grey60",cex=.8)

segments(-4,3.6,-4,7.4,lwd=2,col="grey60",cex=.8)
segments(-4,7.4,-3.65,7.4,lwd=2,col="grey60",cex=.8)
segments(-4,3.6,-3.65,3.6,lwd=2,col="grey60",cex=.8)
text(-4.5,5.5,"Intimacy",srt=90,col="grey60",cex=.8)

segments(-4,7.6,-4,11.4,lwd=2,col="grey60",cex=.8)
segments(-4,11.4,-3.65,11.4,lwd=2,col="grey60",cex=.8)
segments(-4,7.6,-3.65,7.6,lwd=2,col="grey60",cex=.8)
text(-4.5,9.5,"Reciprocity",srt=90,col="grey60",cex=.8)

segments(-4,11.6,-4,17.4,lwd=2,col="grey60",cex=.8)
segments(-4,17.4,-3.65,17.4,lwd=2,col="grey60",cex=.8)
segments(-4,11.6,-3.65,11.6,lwd=2,col="grey60",cex=.8)
text(-4.5,14.5,"Homophily",srt=90,col="grey60",cex=.8)
par(xpd=F)
dev.off()

########################################################################################
# SI_donations_by_tie.pdf: Supporting Information Figure 3
########################################################################################
dat <- read.dta("./tools/final_data_clean.dta")

pdf("./1_Figures/SI_donations_by_tie.pdf")
plot(0,xlim=c(0,100),ylim=c(0,.1),type='n',bty='n',xaxt='n',yaxt='n',xlab="Donated Amount ($)",ylab="",main="Donation Amount by Tie")
axis(1,at=seq(0,100,by=10),cex.axis=.8)
sapply(1:10,function(i) rect(hist(c(dat$donate_weakest,dat$donate_weak),plot=F)$breaks[i],0,
                             hist(c(dat$donate_weakest,dat$donate_weak),plot=F)$mids[i]-1,hist(c(dat$donate_weakest,dat$donate_weak),plot=F)$density[i],
                             col=rgb(0,0,0,.4),border=rgb(0,0,0,.4)))
sapply(1:10,function(i) rect(hist(c(dat$donate_strongest,dat$donate_strong),plot=F)$breaks[i]+2,0,
                             hist(c(dat$donate_strongest,dat$donate_strong),plot=F)$mids[i]+1,hist(c(dat$donate_strongest,dat$donate_strong),plot=F)$density[i],
                             col=rgb(0,0,0,.2),border=rgb(0,0,0,.2)))
sapply(1:10,function(i) rect(hist(dat$donate_you,plot=F)$breaks[i]+4,0,
                             hist(dat$donate_you,plot=F)$mids[i]+3,hist(dat$donate_you,plot=F)$density[i],
                             col=rgb(1,1,1,.4),border="black"))
legend("right",legend=c("Weakest & Weak","Strong & Strongest","Self"),fill=c(rgb(0,0,0,.5),rgb(0,0,0,.2),rgb(1,1,1,.1)),
       border=c(NA,NA,"black"),bty='n',cex=.9)
dev.off()